"""Model of new novel appearance."""
import numpy as np
import scipy.optimize
import scipy.stats

import pystan_cache


def neg_binomial_2_rng(mu, phi):
    """Generate a sample from neg_binomial_2(mu, phi).

    This function is defined here in Python rather than in Stan because Stan
    reports overflow errors.

    $E(Y) = mu$
    $Var(Y) = mu + mu^2 / phi

    This function will work fine with arrays.

    """
    return np.random.poisson(np.random.gamma(phi, mu / phi))


def _find_gamma_distribution(p5, p95):
    """Calculate Gamma parameters for distribution with given 5th and 95th percentiles.

    Returns:
        float, float: shape, rate (alpha, beta) parameters

    """
    def objective(alpha, beta):
        error = (0.05 - scipy.stats.gamma.cdf(a=alpha, scale=1 / beta, x=p5)) ** 2
        error += (0.95 - scipy.stats.gamma.cdf(a=alpha, scale=1 / beta, x=p95)) ** 2
        return error

    def initial_guess(location, scale):
        """Make an initial guess for alpha, beta based on a normal approximation."""
        alpha = (location / scale) ** 2
        beta = location / scale ** 2
        return alpha, beta

    alpha0, beta0 = initial_guess((p95 + p5) / 2, (p95 - p5) / 4)
    result = scipy.optimize.minimize(lambda args: objective(*args), x0=(alpha0, beta0), bounds=((1e-7, None),) * 2)
    if not result.success:
        raise RuntimeError('Unable to find minimum: `{}`'.format(result))
    return result.x


def _find_normal_distribution(p25, p50, p75):
    """Find parameters for normal distribution with indicated quartiles."""
    def objective(mu, sigma):
        percentiles = [0.25, 0.5, 0.75]
        percentiles_current = [scipy.stats.norm.cdf(x=q, loc=mu, scale=sigma) for q in [p25, p50, p75]]
        error = sum((p - p_curr) ** 2 for p, p_curr in zip(percentiles, percentiles_current))
        return error

    mu0, sigma0 = p50, (p75 - p25) / 2
    result = scipy.optimize.minimize(lambda args: objective(*args),
                                     x0=(mu0, sigma0),
                                     bounds=[(None, None), (1e-7, None)])
    if not result.success:
        raise RuntimeError('Unable to find minimum: `{}`'.format(result))
    return result.x


def _find_gamma_distribution_for_log_bassett(p25, p50, p75):
    """Find Gamma distribution characterizing Bassett's distribution in log terms.

    Bassett's quartiles provide a Normal(mu, sigma). We are modeling log novel
    counts, so we need a distribution which approximates the log of Normal(mu,
    sigma). We use a Gamma distribution.

    NOTE: The normal distribution in question is constrained to have only
    positive support.

    Args:
        float, float, float: quartiles

    Returns:
        float, float: shape (alpha) and rate (beta) parameters

    """
    mu, sigma = _find_normal_distribution(p25, p50, p75)
    sim = np.random.normal(mu, sigma, size=int(1e6))
    log_sim = np.log(sim[sim > 0])
    p5, p95 = np.percentile(log_sim, [5, 95])
    return _find_gamma_distribution(p5, p95)


def make_model():
    """Make model of new novel publications."""
    model_code = """
    data {
      int y[37];            // new titles, all authors 1800-1836 (inclusive) (only 1830-1836 used)
      int y_unknown[30];    // new titles, unknown gender authors 1800-1829 (inclusive)
      int y_men[30];        // new titles, men authors 1800-1829 (inclusive)
      int y_women[30];      // new titles, women authors 1800-1829 (inclusive)
      int pc[77];           // publishers circular total counts 1843 - 1919 (inclusive)
      int casey_men[9];     // ellen miller casey title counts for 1860, 1865, ..., 1900
      int casey_women[9];   // ellen miller casey title counts for 1860, 1865, ..., 1900
      int casey_unknown[9]; // ellen miller casey title counts for 1860, 1865, ..., 1900
      int nstc[70];         // NSTC 1801-1870, ignore (!) years ending in 0 and 5 as these count undated material
      vector[120] year;     // years, indexed as follows: 1800 == 1, 1802 == 2, ... 1919 == 120
    }
    transformed data {
        // cov_exp_quad wants real valued inputs
        real ryear[120];
        ryear = to_array_1d(year);
    }
    parameters {
      // log novels GP
      vector[120] lambda_tilde;  // new novels rate residual GP
      real alpha_lambda;  // intercept
      real beta_lambda; // year coefficient
      real<lower=0> sigma_lambda;
      real<lower=0> l_lambda;

      // log odds novels of pc GP
      // new novels are a percentage of publishers circular, expressed in log odds
      vector[120] log_odds_novels_pc_tilde;  // log_odds_novels_pc residual GP
      real alpha_log_odds_novels_pc;
      real beta_log_odds_novels_pc;
      real<lower=0> sigma_log_odds_novels_pc;
      real<lower=0> l_log_odds_novels_pc;

      // novels unknown_gender GP
      vector[120] log_odds_unknown_gender_tilde; // residual GP
      real alpha_log_odds_unknown_gender;
      real beta_log_odds_unknown_gender;
      real<lower=0> sigma_log_odds_unknown_gender;
      real<lower=0> l_log_odds_unknown_gender;

      // novels men_of_known_gender GP
      // Note: prob_man = inv_logit(log_odds_men_of_known_gender) * (1 - inv_logit(log_odds_unknown_gender))
      // Note: prob_woman = (1 - inv_logit(log_odds_men_of_known_gender)) * (1 - inv_logit(log_odds_unknown_gender))
      vector[120] log_odds_men_of_known_gender_tilde; // residual GP
      real alpha_log_odds_men_of_known_gender;
      real beta_log_odds_men_of_known_gender;
      real<lower=0> sigma_log_odds_men_of_known_gender;
      real<lower=0> l_log_odds_men_of_known_gender;

      // non-GP parameters
      real<lower=0> phi_y_inv;  // negative binomial overdispersion new novel counts, all genders

      real<lower=0> phi_casey_inv;  // negative binomial overdispersion casey reviewed novel counts
      real<lower=0> pct_casey_novels;  // casey counts (Antheneum reviews of new novels) are this percentage of total new novel counts

      real<lower=0> phi_pc_inv;  // negative binomial overdispersion for publishers circular

      real<lower=0> phi_nstc_inv;  // negative binomial overdispersion for nstc
      real<lower=0> alpha_nstc;  // nstc are this proportion of publishers circular total publications

    }
    transformed parameters {
      vector[120] lambda;  // primary variable of interest: log mean novels plus residual GP
      vector[120] log_odds_novels_pc; // new novels are a percentage of publishers circular, expressed in log odds, log(p/(1-p))
      vector[120] log_odds_unknown_gender;
      vector[120] log_odds_men_of_known_gender;

      // lambda trend + residuals
      lambda = alpha_lambda + beta_lambda * year + lambda_tilde;

      // log_odds_novels_pc trend + residuals
      log_odds_novels_pc = alpha_log_odds_novels_pc + beta_log_odds_novels_pc * year + log_odds_novels_pc_tilde;

      // gender annotation GPs
      log_odds_unknown_gender = alpha_log_odds_unknown_gender + beta_log_odds_unknown_gender * year + log_odds_unknown_gender_tilde;
      log_odds_men_of_known_gender = alpha_log_odds_men_of_known_gender + beta_log_odds_men_of_known_gender * year + log_odds_men_of_known_gender_tilde;

    }
    model {
      matrix[120, 120] Sigma;
      matrix[120, 120] L_Sigma;

      ///////////////////////////////////////////////////
      // novels GP
      // GP hyperparameters
      sigma_lambda ~ normal(0, 1);   // marginal variance
      l_lambda ~ gamma(2.44, 0.54); // characteristic length-scale, 90% mass between 1 and 10

      // calculate GP mean
      alpha_lambda ~ normal(0, 1);
      beta_lambda ~ normal(0, 1);

      // calculate GP covariance
      // the last term (`diag_matrix`) is modeling noise, sigma_n in Rasmussen and Williams. It prevents pathologies.
      Sigma = cov_exp_quad(ryear, sigma_lambda, l_lambda) + diag_matrix(rep_vector(0.01, 120));

      // decompose Sigma
      L_Sigma = cholesky_decompose(Sigma);
      lambda_tilde ~ multi_normal_cholesky(rep_vector(0, 120), L_Sigma);
      ///////////////////////////////////////////////////

      ///////////////////////////////////////////////////
      // log_odds_novels_pc GP
      // GP hyperparameters
      sigma_log_odds_novels_pc ~ normal(0, 1);   // marginal variance
      l_log_odds_novels_pc ~ gamma(5.2, 0.26); // characteristic length-scale, 90% mass between 8 and 36 (years)

      // calculate GP mean
      // the rate of new novel publication exp(lambda) is a fraction (pct_novels) of
      // the total number of editions published in publishers circular
      alpha_log_odds_novels_pc ~ normal(0, 1);
      beta_log_odds_novels_pc ~ normal(0, 1);

      // calculate GP covariance
      // the last term (`diag_matrix`) is modeling noise, sigma_n in Rasmussen and Williams. It prevents pathologies.
      Sigma = cov_exp_quad(ryear, sigma_log_odds_novels_pc, l_log_odds_novels_pc) + diag_matrix(rep_vector(0.01, 120));

      // decompose Sigma
      L_Sigma = cholesky_decompose(Sigma);
      log_odds_novels_pc_tilde ~ multi_normal_cholesky(rep_vector(0, 120), L_Sigma);
      ///////////////////////////////////////////////////

      ///////////////////////////////////////////////////
      // log odds_unknown gender GP
      // GP hyperparameters
      sigma_log_odds_unknown_gender ~ normal(0, 1);   // marginal variance
      l_log_odds_unknown_gender ~ gamma(5.2, 0.26); // characteristic length-scale, 90% mass between 8 and 36 (years)

      // calculate GP mean
      alpha_log_odds_unknown_gender ~ normal(0, 1);
      beta_log_odds_unknown_gender ~ normal(0, 1);

      // calculate GP covariance
      Sigma = cov_exp_quad(ryear, sigma_log_odds_unknown_gender, l_log_odds_unknown_gender) + diag_matrix(rep_vector(0.01, 120));

      // decompose Sigma
      L_Sigma = cholesky_decompose(Sigma);
      log_odds_unknown_gender_tilde ~ multi_normal_cholesky(rep_vector(0, 120), L_Sigma);
      ///////////////////////////////////////////////////

      ///////////////////////////////////////////////////
      // log odds_unknown gender GP
      // GP hyperparameters
      sigma_log_odds_men_of_known_gender ~ normal(0, 1);   // marginal variance
      l_log_odds_men_of_known_gender ~ gamma(5.2, 0.26); // characteristic length-scale, 90% mass between 8 and 36 (years)

      // calculate GP mean
      alpha_log_odds_men_of_known_gender ~ normal(0, 1);
      beta_log_odds_men_of_known_gender ~ normal(0, 1);

      // calculate GP covariance
      Sigma = cov_exp_quad(ryear, sigma_log_odds_men_of_known_gender, l_log_odds_men_of_known_gender) + diag_matrix(rep_vector(0.01, 120));

      // decompose Sigma
      L_Sigma = cholesky_decompose(Sigma);
      log_odds_men_of_known_gender_tilde ~ multi_normal_cholesky(rep_vector(0, 120), L_Sigma);
      ///////////////////////////////////////////////////

      ///////////////////////////////////////////////////
      // sampling distribution
      // negative-binomial model of the novel counts 1800-1836 (inclusive)
      phi_y_inv ~ normal(0, 10);
      y[31:37] ~ neg_binomial_2(exp(lambda[31:37]), inv(phi_y_inv)); // 1830-1836 (7 years)

      // Note: prob_man = inv_logit(log_odds_men_of_known) * (1 - inv_logit(log_odds_unknown_gender))
      // Note: prob_woman = (1 - inv_logit(log_odds_men_of_known)) * (1 - inv_logit(log_odds_unknown_gender))
      y_unknown ~ neg_binomial_2(exp(lambda[1:30]) .* inv_logit(log_odds_unknown_gender[1:30]), inv(phi_y_inv));
      y_men ~ neg_binomial_2(exp(lambda[1:30]) .* (1 - inv_logit(log_odds_unknown_gender[1:30])) .* inv_logit(log_odds_men_of_known_gender[1:30]), inv(phi_y_inv));
      y_women ~ neg_binomial_2(exp(lambda[1:30]) .* (1 - inv_logit(log_odds_unknown_gender[1:30])) .* (1 - inv_logit(log_odds_men_of_known_gender[1:30])), inv(phi_y_inv));

      // ellen miller casey (Antheneum reviews)
      pct_casey_novels ~ gamma(15.5, 32);  // prior putting 90% of mass between 0.3 and 0.7
      phi_casey_inv ~ normal(0, 10);
      for (i in 1:9) {
        casey_unknown[i] ~ neg_binomial_2(pct_casey_novels * exp(lambda[61 + 5 * (i - 1)]) * inv_logit(log_odds_unknown_gender[61 + 5 * (i - 1)]), inv(phi_casey_inv));
        casey_men[i] ~ neg_binomial_2(pct_casey_novels * exp(lambda[61 + 5 * (i - 1)]) *
                                                         (1 - inv_logit(log_odds_unknown_gender[61 + 5 * (i - 1)])) *
                                                         inv_logit(log_odds_men_of_known_gender[61 + 5 * (i - 1)]), inv(phi_casey_inv));
        casey_women[i] ~ neg_binomial_2(pct_casey_novels * exp(lambda[61 + 5 * (i - 1)]) *
                                                           (1 - inv_logit(log_odds_unknown_gender[61 + 5 * (i - 1)])) *
                                                           (1 - inv_logit(log_odds_men_of_known_gender[61 + 5 * (i - 1)])), inv(phi_casey_inv));
      }
      // publishers circular and nstc

      //// publishers circular, 1843 - 1919 (inclusive)
      phi_pc_inv ~ normal(0, 10);
      // scale up from novels to publishers circular
      pc[1:77] ~ neg_binomial_2(inv(inv_logit(log_odds_novels_pc[44:120])) .* exp(lambda[44:120]), inv(phi_pc_inv));

      // NSTC 1801-1870
      phi_nstc_inv ~ normal(0, 10);
      // nstc counts are a constant multiple of publishers circular counts
      alpha_nstc ~ gamma(1, 0.5);  // weakly informative prior

      // use the common index, 1 is 1800, 2 is 1801, ..., 6 is 1805, ..., 71 is 1870
      for (i in 1:71) {
        if ((i - 1) % 5 != 0) {  // do not use NSTC data from years ending with 0 or 5 (1 is 1800, 2 is 1801, ...), see paper for details
          nstc[i - 1] ~ neg_binomial_2(inv(inv_logit(log_odds_novels_pc[i])) * exp(lambda[i]) * alpha_nstc, inv(phi_nstc_inv));
        }
      }

      // Troy Bassett elicited priors on the years 1886, 1891, 1895
      // NOTE: Bassett uses a more inclusive definition of the novel than
      // Garside, Raven, and Schoewerling. He states GRS exclude 10-15% of the
      // works he includes as novels. As a middle value, a discount of 12.5%
      // is used.
      // {1886: [450, 550, 700],
      //  1891: [550, 600, 700],
      //  1894: [750, 900, 1100]}
      // Discounted by 12.5%
      // {1886: [393.75, 481.25, 612.5],
      //  1891: [481.25, 525.0, 612.5],
      //  1894: [656.25, 787.5, 962.5]}
      // These Gamma priors capture the 12.5% discounted version:
      lambda[87] ~ gamma(278.36, 46.3); // 1886
      lambda[92] ~ gamma(1530.73, 245.4); // 1891
      lambda[95] ~ gamma(667.9, 101.8); // 1894
    }
    // Note on using the `generated quantities` block to generate draws
    // using neg_binomial_2_rng: Stan will likely generate errors such as:
    // neg_binomial_2_rng: Random number that came from gamma distribution is 7.03679e+09, but must
    // be less than 1.07374e+09. Fix is to generate the draws outside of Stan.
    """
    return pystan_cache.make_model(model_code)


if __name__ == '__main__':
    print('running superficial tests.')
    assert make_model() is not None
    print('done.')
